* CLEAN THE 5-YEAR TRAJECTORIES DATA
*-------------------------------------------------------------------------------

use "$data/initial_data/Observed Trajectories (5-year FU)", clear

* clean EDUCATION
*-------------------------------------------------------------------------------
gen age_schooldropout=6+edu_years 
replace age_schooldropout=18 if age_now==18 & age_schooldropout==19 & edu_any_now==0
replace age_schooldropout=min(19,max(age_schooldropout,age_now)) if edu_other_now==1

foreach x of numlist 6/23 {
	gen inschool`x'=(age_schooldropout>`x') if age_now>`x' & age_schooldropout<.
	}

* clean AGE OF MARRIAGE
*-------------------------------------------------------------------------------
/*
NB. We have four different age of marriage variables (with different patterns of missings) and no particular prior on which one to favor. We have: age of marriage as reported by daughter, age of marriage as reported by carer, age of marriage imputed from daughter's report of the year of marriage, age of marriage imputed from daughter's report of the length of marriage and current age. 
*/	

* cap each at current age	
	foreach x of varlist age_marr  age_marr_c age_marr2  age_marr3 {
		replace `x'=age_now if `x'>age_now & `x'<.
	}
	
* take the median, no clear one to favor
egen age_marr_analysis=rowmedian(age_marr  age_marr_c age_marr2  age_marr3)
replace age_marr_analysis=floor(age_marr_analysis)
	
* create indicators for whether married at each age
foreach x of numlist 6/23 {
	gen married`x'=0 if married_any==0 & age_now>`x'
	replace married`x'=(age_marr_analysis<=`x') if age_now>`x' & age_marr_analysis<.
	}
	
* save cleaned data
*-------------------------------------------------------------------------------
save "$data/created_data/Observed Trajectories (5-year FU) -- cleaned", replace